# ------------------------------------------------------------------------------
# Tamaki & Jung 2025
# The Non-linearity of Populist Attitudes and Political Ideology
# Replication Code for PSRM
# ------------------------------------------------------------------------------
# Log file 
sink("Tamaki_and_Jung_replication_log.txt", split = TRUE)
setwd("C:/Users/yujin/Dropbox/Cowork-Populist Attitudes/PSRM - R and R/Final Code")
# PREAMBLE ---------------------------------------------------------------------
# Clear Environment
rm(list = ls())
gc()

# Load Required Packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, mgcv, sandwich, rio, stargazer)

# Load Data
load("df_final(11.24).RData")

# Data Preparation -------------------------------------------------------------
df2 <- df %>%
  select(pop_add_m, pop_add, ideo, ext2,
         age, gen, edu, empl, 
         corrupt_ind, econ_ind, pol_int,
         socecon_status, occ_status,
         country_name) %>%
  mutate(
    country_name = factor(country_name),
    socecon_status = case_when(
      socecon_status == 1 ~ "White Collar",
      socecon_status == 2 ~ "Worker",
      socecon_status == 3 ~ "Farmer",
      socecon_status == 4 ~ "Self-employed",
      TRUE ~ NA_character_
    ),
    socecon_status = relevel(factor(socecon_status), ref = "White Collar")
  )

# Descriptive Statistics Table (Table 1) ---------------------------------------
# Item labels
item_labels <- c(
  "What people call compromise in politics is really just selling out on one's principles.",
  "Most politicians do not care about the people.",
  "Politicians are the main problem in [country].",
  "Most politicians care only about the interests of the rich and powerful.",
  "The people, and not politicians, should make our most important policy decisions.",
  ""
)

# Subset data for selected countries
pop_tbl <- df %>%
  filter(country_name %in% c(1:24, 26:47, 50:55)) %>%
  select(m1, ae1, ae3, ae4, pc1)

# Compute descriptive statistics
descs_mean <- pop_tbl %>%
  drop_na() %>%
  summarise(across(everything(), mean)) %>%
  t()

descs_sd <- pop_tbl %>%
  drop_na() %>%
  summarise(across(everything(), sd)) %>%
  t()

item_n <- colSums(!is.na(pop_tbl))

# Combine into table
pop_descs <- data.frame(
  Item = item_labels,
  N = c(item_n, nrow(pop_tbl)),
  Mean = c(round(descs_mean, 2), ""),
  Sd = c(round(descs_sd, 2), "")
)

# Print Table
stargazer::stargazer(
  pop_descs, summary = FALSE, digits = 2,
  title = "Descriptive Statistics for the Populist Attitudes Variable",
  label = "tab:dspa",
  header = FALSE,
  type = "text",
  out = "Table1_DescriptiveStats.txt"
)
# ------------------------------------------------------------------------------
# Table 2: Generalized Additive Model of Populist Attitudes by Ideology
# Replication Code for PSRM
# ------------------------------------------------------------------------------
# GAM Estimation (Model 1) -----------------------------------------------------
gam_m1.3 <- gam(
  pop_add_m ~ s(ideo) + 
    age + gen + edu + empl + 
    corrupt_ind + econ_ind +
    factor(country_name), 
  method = 'REML',
  data = df2
)

summary(gam_m1.3)

# Diagnostics ------------------------------------------------------------------
# Check if residuals deviate from normality
gam.check(gam_m1.3)  

# Check for concurvity (nonlinear multicollinearity among smooth terms)
concurvity(gam_m1.3, full = TRUE)

# Visualization: Figure 1 -----------------------------------------------------

# Plot smoothed effect of ideology on populist attitudes
png("Figure1.png", width = 800, height = 600)
plot(
  gam_m1.3,
  select = 1,                  # s(ideo)
  seWithMean = TRUE,
  shift = coef(gam_m1.3)[1],   # Shift y-axis to intercept level
  shade = TRUE,
  ylim = c(2.65, 3),
  xlab = "Ideology",
  ylab = "Populist Attitudes", 
  cex.lab = 1.2,
  cex.axis = 1,
  rug = TRUE
)

dev.off()

# Check: Political Interest and Ideology ---------------------------------------

# Drop unreliable political interest responses (codes 7–9)
df2_pi <- df2 %>%
  mutate(pol_int = ifelse(pol_int %in% c(7:9), NA, pol_int))

# Count political interest by ideology
ideo_df <- df2_pi %>%
  count(ideo, pol_int, name = "count")

# Define uninterested as pol_int == 3 or 4
# Compare respondents at ideology = 5 to all others

# Group: Ideology = 5
ideo_5 <- ideo_df %>% filter(ideo == 5)
unint_ideo_5 <- sum(ideo_5$count[ideo_5$pol_int %in% c(3, 4)])
total_ideo_5 <- sum(ideo_5$count)

# Group: Ideology ≠ 5
other_ideo <- ideo_df %>% filter(ideo != 5)
unint_other_ideo <- sum(other_ideo$count[other_ideo$pol_int %in% c(3, 4)])
total_other_ideo <- sum(other_ideo$count)

# Two-proportion z-test
prop.test(
  x = c(unint_ideo_5, unint_other_ideo),
  n = c(total_ideo_5, total_other_ideo),
  alternative = "two.sided"
)

# ------------------------------------------------------------------------------
# Re-estimated Model 1: Excluding Ideology = 5 with Low Political Interest
# Figure 2 and Table 2
# ------------------------------------------------------------------------------
# Filter Data: Exclude respondents with ideology == 5 and low interest (3, 4)
df2.2 <- df2 %>%
  filter(!(ideo == 5 & pol_int %in% c(3, 4)))

# Estimate Model
re.gam_m1.3 <- gam(
  pop_add_m ~ s(ideo) +
    age + gen + edu + empl +
    corrupt_ind + econ_ind +
    factor(country_name),
  method = "REML",
  data = df2.2
)

summary(re.gam_m1.3)

# Diagnostics ------------------------------------------------------------------

# Check residuals and fit
gam.check(re.gam_m1.3)

# Check concurvity (nonlinear multicollinearity)
concurvity(re.gam_m1.3, full = TRUE)

# Visualization: Figure 2 ------------------------------------------------------
png("Figure2.png", width = 800, height = 600)
plot(
  re.gam_m1.3,
  select = 1,
  seWithMean = TRUE,
  shift = coef(re.gam_m1.3)[1],
  shade = TRUE,
  ylim = c(2.65, 3),
  xlab = "Ideology",
  ylab = "Populist Attitudes",
  cex.lab = 1.2,
  cex.axis = 1,
  rug = TRUE
)

dev.off()

# Table 2: Compare Model 1 and Re-estimated Model -------------------------------
stargazer::stargazer(
  gam_m1.3, re.gam_m1.3,
  type = "text",
  out = "Table2.txt",
  odd.ratio = TRUE,
  dep.var.labels = "Populist Attitudes",
  keep = c("age", "gen", "edu", "empl", "corrupt_ind", "econ_ind", "Constant"),
  star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
  star.char = c("+", "*", "**", "***"),
  ci = TRUE,
  notes = "Odds with 95% Confidence Intervals in parentheses. + p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
  notes.append = FALSE,
  align = TRUE
)

# ------------------------------------------------------------------------------
# Table 3: GAM of Populist Attitudes and Ideological Extremism
# Main and Re-estimated Models, Figures 3 & 4
# ------------------------------------------------------------------------------
# Model 2: Main Model with Full Sample ----------------------------------------
gam_m2.3 <- gam(
  pop_add_m ~ s(ext2) +
    age + gen + edu + empl + 
    corrupt_ind + econ_ind +
    factor(country_name),
  method = "REML",
  data = df2
)

summary(gam_m2.3)

# Diagnostics ------------------------------------------------------------------
# Check residual distribution and smoothness assumptions
gam.check(gam_m2.3)

# Check concurvity (nonlinear collinearity)
concurvity(gam_m2.3, full = TRUE)

# Figure 3: Smoothed Effect of Extremism --------------------------------------
png("Figure3.png", width = 800, height = 600)
plot(
  gam_m2.3,
  select = 1,
  seWithMean = TRUE,
  shift = coef(gam_m2.3)[1],  
  shade = TRUE,
  ylim = c(2.75, 3.05),
  xlab = "Ideological Extremism",
  ylab = "Populist Attitudes",
  cex.lab = 1.2,
  cex.axis = 1,
  rug = TRUE
)
dev.off()

# Model 2: Re-estimated Model with Filtered Sample -----------------------------

re.gam_m2.3 <- gam(
  pop_add_m ~ s(ext2) +
    age + gen + edu + empl + 
    corrupt_ind + econ_ind +
    factor(country_name),
  method = "REML",
  data = df2.2
)

summary(re.gam_m2.3)

# Diagnostics ------------------------------------------------------------------
gam.check(re.gam_m2.3)
concurvity(re.gam_m2.3, full = TRUE)

# Figure 4: Smoothed Effect in Re-estimated Model ------------------------------
png("Figure4.png", width = 800, height = 600)
plot(
  re.gam_m2.3,
  select = 1,
  seWithMean = TRUE,
  shift = coef(re.gam_m2.3)[1],
  shade = TRUE,
  ylim = c(2.75, 3.05),
  xlab = "Ideological Extremism",
  ylab = "Populist Attitudes",
  cex.lab = 1.2,
  cex.axis = 1,
  rug = TRUE
)

dev.off()

# Table 3: Compare Models with and without Ideological Center ------------------
stargazer::stargazer(
  gam_m2.3, re.gam_m2.3,
  type = "text",
  out = "Table3.txt",
  odd.ratio = TRUE,
  dep.var.labels = "Populist Attitudes",
  keep = c("age", "gen", "edu", "empl", "corrupt_ind", "econ_ind", "Constant"),
  star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
  star.char = c("+", "*", "**", "***"),
  ci = TRUE,
  notes = "Odds with 95% Confidence Intervals in parentheses. + p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
  notes.append = FALSE,
  align = TRUE
)
# ------------------------------------------------------------------------------
# Robustness Checks: Functional Form Tests for Ideology and Extremism
# Tables 4 and 5
# ------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
# Table 4: Ideology – Comparing GAM vs Linear vs Quadratic Models
# ------------------------------------------------------------------------------
# GAM with smooth term for ideology (Model 1.2)
re.gam_m1.3 <- gam(
  pop_add_m ~ s(ideo) +
    age + gen + edu + empl +
    corrupt_ind + econ_ind +
    factor(country_name),
  method = "REML",
  data = df2.2
)

# Linear ideology model (Model 1.3)
re.gam_m1.3.lin <- gam(
  pop_add_m ~ ideo +
    age + gen + edu + empl +
    corrupt_ind + econ_ind +
    factor(country_name),
  method = "REML",
  data = df2.2
)

# Quadratic ideology model (Model 1.4) – using raw polynomial terms
re.gam_m1.3.quad <- gam(
  pop_add_m ~ ideo + I(ideo^2) +
    age + gen + edu + empl +
    corrupt_ind + econ_ind +
    factor(country_name),
  method = "REML",
  data = df2.2
)

# Compare model fit: AIC and BIC
AIC(re.gam_m1.3.lin, re.gam_m1.3.quad, re.gam_m1.3)
BIC(re.gam_m1.3.lin, re.gam_m1.3.quad, re.gam_m1.3)

# ------------------------------------------------------------------------------
# Table 5: Extremism – Comparing GAM vs Linear vs Exponential Models
# ------------------------------------------------------------------------------
# GAM with smooth term for extremism (Model 2.2)
re.gam_m2.3 <- gam(
  pop_add_m ~ s(ext2) +
    age + gen + edu + empl +
    corrupt_ind + econ_ind +
    factor(country_name),
  method = "REML",
  data = df2.2
)

# Linear extremism model (Model 2.3)
re.gam_m2.3.lin <- gam(
  pop_add_m ~ ext2 +
    age + gen + edu + empl +
    corrupt_ind + econ_ind +
    factor(country_name),
  method = "REML",
  data = df2.2
)

# Exponential extremism model (Model 2.4)
re.gam_m2.3.exp <- gam(
  pop_add_m ~ ext2 + exp(ext2) +
    age + gen + edu + empl +
    corrupt_ind + econ_ind +
    factor(country_name),
  method = "REML",
  data = df2.2
)

# Table Output: Linear vs Exponential
stargazer::stargazer(
  re.gam_m2.3.lin, re.gam_m2.3.exp,
  type = "text",
  out = "Table5.txt",
  odd.ratio = TRUE,
  dep.var.labels = "Populist Attitudes",
  keep = c("ext2", "age", "gen", "edu", "empl", "corrupt_ind", "econ_ind", "Constant"),
  star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
  star.char = c("+", "*", "**", "***"),
  ci = TRUE,
  align = TRUE,
  notes = "Odds with 95% Confidence Intervals in parentheses. + p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
  notes.append = FALSE
)

# Compare model fit: AIC and BIC
AIC(re.gam_m2.3.lin, re.gam_m2.3.exp, re.gam_m2.3)
BIC(re.gam_m2.3.lin, re.gam_m2.3.exp, re.gam_m2.3)

# ------------------------------------------------------------------------------
# Table 6: Conditional Effects of Ideology by Type of Populist Support
# ------------------------------------------------------------------------------
# Load Country-level Populist Support Labels -----------------------------------
df_c <- import("pop_supply.csv")  
df_c$id <- as.factor(df_c$id)

# Merge with respondent-level data
df3 <- df2.2 %>%
  left_join(df_c, by = c("country_name" = "id"))

# Create Subsets by Populist Type ---------------------------------------------
df3_right  <- df3 %>% filter(pop_support_label == "right")
df3_left   <- df3 %>% filter(pop_support_label == "left")
df3_all    <- df3 %>% filter(pop_support_label == "all")
df3_center <- df3 %>% filter(pop_support_label == "center")

# Prepare Dataset List
datasets <- list(
  right = df3_right,
  left = df3_left,
  all = df3_all,
  center = df3_center
)

# GAM Estimation Function ------------------------------------------------------
run_gam <- function(data) {
  gam(
    pop_add_m ~ s(ideo) +
      age + gen + edu + empl +
      corrupt_ind + econ_ind +
      factor(country_name),
    method = "REML",
    data = data
  )
}

# Run Models for All Groups ----------------------------------------------------
gam <- list()
for (name in names(datasets)) {
  gam[[name]] <- run_gam(datasets[[name]])
}

# Visualization: Figure 5 (4 Panels) -------------------------------------------
png("Figure5.png", width = 1200, height = 800)
par(mfrow = c(2, 2))  # 2x2 layout

plot(gam$right, select = 1, seWithMean = TRUE, shift = coef(gam$right)[1],
     shade = TRUE, xlab = "Ideology", ylab = "Populist Attitudes",
     main = "Right-Wing Populist Parties", cex.lab = 1.2, cex.axis = 1, rug = TRUE)

plot(gam$left, select = 1, seWithMean = TRUE, shift = coef(gam$left)[1],
     shade = TRUE, xlab = "Ideology", ylab = "", 
     main = "Left-Wing Populist Parties", cex.lab = 1.2, cex.axis = 1, rug = TRUE)

plot(gam$center, select = 1, seWithMean = TRUE, shift = coef(gam$center)[1],
     shade = TRUE, xlab = "Ideology", ylab = "Populist Attitudes",
     main = "Center Populist Parties", cex.lab = 1.2, cex.axis = 1, rug = TRUE)

plot(gam$all, select = 1, seWithMean = TRUE, shift = coef(gam$all)[1],
     shade = TRUE, xlab = "Ideology", ylab = "",
     main = "Diverse Populist Parties", cex.lab = 1.2, cex.axis = 1, rug = TRUE)

dev.off()

# Model Summaries --------------------------------------------------------------
summary(gam$right)
summary(gam$left)
summary(gam$center)
summary(gam$all)

# Table Output: Coefficients Comparison ----------------------------------------
stargazer::stargazer(
  gam$right, gam$left, gam$center, gam$all,
  type = "text",
  out = "Table6.txt",
  odd.ratio = TRUE,
  dep.var.labels = "Populist Attitudes",
  keep = c("age", "gen", "edu", "empl", "corrupt_ind", "econ_ind", "Constant"),
  star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
  star.char = c("+", "*", "**", "***"),
  ci = TRUE,
  align = TRUE,
  notes = "Odds with 95% Confidence Intervals in parentheses. + p<0.1; * p<0.05; ** p<0.01; *** p<0.001",
  notes.append = FALSE
)

# Stop logging------------------------------------------------------------------
# Log session information
sessionInfo()
sink()